*Program 1: First DATA step (p.9);
data first_data;
  input y;       
  datalines;     
2
5
9
9
10
11
;                
run;


*Program 2: Printing the data file (p.15);
proc print data=first_data;
title;
run;


*Program 3: Using PROC UNIVARIATE (p.16);
proc univariate plot data=first_data;
var y;
run;


*Program 4(a): Missing semicolons in Program 3 (p.19);
proc univariate plot data=first_data
var y;
run;


*Program 4(b): Missing semicolons in Program 3 (p.21);
proc univariate plot data=first_data;
var y
run;


*Program 5: Reading a delimited file (p.22);
data first_data2;
infile 'C:\Documents\SAS workshop\MyData\datafile.txt';
input y;
run; 


*Program 6: Reading tab delimited file (p.25);
data pulse;
infile 'C:\Users\sjricht2\Box Sync\Consulting\Workshops\SAS Workshop\data.txt' firstobs=2 dlm='09'x;
input Height Weight	Age	Gender$ Smokes Alcohol Exercise Ran Pulse1 Pulse2 Year;
run; 


*Program 7: Reading a csv delimited file (p.26);
data pulse;
infile 'C:\Documents\SAS workshop\MyData\data.csv' dlm=',' firstobs=2;
input Height Weight	Age	Gender$ Smokes Alcohol Exercise Ran Pulse1 Pulse2 Year;
run;


*Program 8: Exploring the details of a data file (p.33);
proc contents data=pulse;
run;


*Program 9(a): Explore the relationship between Pulse1 and Weight (p.35);
proc sgscatter data=pulse;
plot pulse1*weight;
run;


*Program 9(b): Add regression line (p.36);
proc sgscatter data=pulse;
plot pulse1*weight / reg;
run;


*Program 9(c): Add regression line (p.37);
proc reg data=pulse;
model pulse1=weight;
run;


*Program 9(d): Add residual plot (p.43);
proc reg data=pulse;
model pulse1=weight;
plot r.*p.;
run;


*Program 9(e): Correlation (p.44);
proc corr data=pulse;
var weight pulse1;
run;


*Program 10(a): Boxplots of Pulse1 by smoking status (p.45); 
proc boxplot data=pulse;
plot pulse1*smokes;
run;


*Program 10(b): Boxplots of Pulse1 by smoking status redo (p.46);
proc sort data=pulse;
by smokes;
run;

proc boxplot data=pulse;
plot pulse1*smokes;
run;


*Program 10(c): Schematic Boxplots (p.47);
proc boxplot data=pulse;
plot pulse1*smokes / boxstyle=schematic;
format smokes smokes_label.;
run;


*Program 10(d): Boxplots using SGPLOT (p.50);
proc sgplot data=pulse;
vbox pulse1 / group=smokes;
run;


*Program 11: Creating formats and labels (p.52);
proc format;
value smokes_label 1 = 'Yes'
                  2 = 'No';
run;

proc sgplot data=pulse;
vbox pulse1 / group=smokes;
format smokes smokes_label.;
label smokes = 'Smoker'
      pulse1 = 'Initial pulse (b/m)';
run;


*Program 12(a): Means and standard deviations (p.54);
proc means data=pulse;
class smokes;
var pulse1;
run;


*Program 12(b): Means and standard deviations (p.54);
proc means data=pulse n mean std;
class smokes;
var pulse1;
run;


*Program 13: Independent samples t-test (p.56);
proc ttest data=pulse;
class smokes;
var pulse1;
run;


*Program 14(a): Frequency table (p.58);
proc freq data=pulse;
tables ran;
run;


*Program 14(b): Frequency table, test for proportions (p.59);
proc freq data=pulse;
tables ran / binomial;
run;


*Program 15(a): Crosstabs (p.61);
proc freq data=pulse;
tables year*ran;
run;


*Program 15(b): Crosstabs, chi-squared tests (p.63);
proc freq data=pulse;
tables year*ran / chisq;
run;


*Program 16(a): Frequency plots (p.66);
proc freq data=pulse;
where year=93;
tables ran / plots=freqplot;
run;


*Program 16(b): Two-way bar charts (p.69);
proc freq data=pulse;
tables year*ran / plots=freqplot(twoway=cluster);
run;


*Program 17(a): Creating a new variable (p.70);
data pulse2;
set pulse;
if year=93 then year93='Y';
if year~=93 then year93='N';
run;

proc freq data=pulse2;
tables year93;
run;


*Program 17(b): Chi-squared tests (p.71);
proc freq data=pulse2;
tables year93*Ran / chisq;
run;


*Program 18: Calculating a difference variable (p.72);
data pulse2;
set pulse2;
pulse_diff = pulse2-pulse1;
avg_pulse = mean(pulse1,pulse2); 
run;


*Program 19(a): Means and t-test for dependent samples--MEANS procedure (p.73);
proc means data=pulse2 n mean std lclm uclm t probt;
where ran=1;
var pulse2 pulse1 pulse_diff;
run;


*Program 19(b): t-test for dependent samples—TTEST procedure (p.74);
proc ttest data=pulse2;
where ran = 1;
paired pulse2*pulse1;
run;